load packages

library(tidyverse)
library(glmnet)
library(caTools)
library(caret)
library(ROCR)

load coded data

coded = read.csv("~/Documents/code/dsnlab/automotion-test-set/tds_artifact_coded_volumes.csv")

load stripe detection data

fileDir = "~/Documents/code/dsnlab/automotion-test-set/output/"
filePattern = "tds_stripes_.*.csv"
  
file_list = list.files(fileDir, pattern = filePattern)

for (file in file_list){
  # if the merged dataset doesn't exist, create it
  if (!exists("stripes")){
    temp = read.csv(paste0(fileDir,file))
    stripes = data.frame(temp) %>% 
      rename("volume" = t,
             "subjectID" = subject) %>%
      select(-file)
    rm(temp)
  }
  
  # if the merged dataset does exist, append to it
  else {
    temp_dataset = read.csv(paste0(fileDir,file))
    temp_dataset = data.frame(temp_dataset) %>% 
      rename("volume" = t,
             "subjectID" = subject) %>%
      select(-file)
    stripes = rbind(stripes, temp_dataset)
    rm(temp_dataset)
  }
}

load global intensities and rps

# define paths and variables
rpDir = '~/Documents/code/dsnlab/automotion-test-set/rp_txt/'
outputDir = '~/Documents/code/tds_auto-motion/auto-motion-output/'
plotDir = '~/Documents/code/tds_auto-motion/auto-motion-output/plots/'
study = "tds2"
rpPattern = "^rp_([0-9]{3})_(.*).txt"
rpCols = c("euclidian_trans","euclidian_rot","euclidian_trans_deriv","euclidian_rot_deriv","trash.rp")

# global intensities
intensities = read.csv(paste0(outputDir,study,'_globalIntensities.csv'))

# edit volume numbers for subject 157, stop3
intensities = intensities %>% 
  mutate(volume = ifelse(subjectID == 157 & run == "stop3" & volume > 43, volume - 1, volume))

# rp files
file_list = list.files(rpDir, pattern = rpPattern)

for (file in file_list){
  # if the merged dataset doesn't exist, create it
  if (!exists("rp")){
    temp = read.table(paste0(rpDir,file))
    colnames(temp) = rpCols
    rp = data.frame(temp, file = rep(file,count(temp))) %>% 
      mutate(volume = row_number()) %>%
      extract(file,c("subjectID","run"), rpPattern) %>%
      mutate(subjectID = as.integer(subjectID))
    rm(temp)
  }
  
  # if the merged dataset does exist, append to it
  else {
    temp_dataset = read.table(paste0(rpDir,file))
    colnames(temp_dataset) = rpCols
    temp_dataset = data.frame(temp_dataset, file = rep(file,count(temp_dataset))) %>% 
      mutate(volume = row_number()) %>%
      extract(file,c("subjectID","run"), rpPattern) %>%
      mutate(subjectID = as.integer(subjectID))
    rp = rbind(rp, temp_dataset)
    rm(temp_dataset)
  }
}

join dataframes

joined = left_join(stripes, coded, by = c("subjectID", "run", "volume")) %>%
  left_join(., intensities, by = c("subjectID", "run", "volume")) %>%
  left_join(., rp, by = c("subjectID", "run", "volume")) %>%
  mutate(striping = ifelse(is.na(striping), 0, striping),
         intensity = ifelse(is.na(intensity), 0, intensity),
         tile = paste0("tile_",tile),
         artifact = ifelse(striping > 1, 1, 0)) %>%
  group_by(subjectID, run, tile) %>%
  mutate(Diff.mean = volMean - lag(volMean),
         Diff.sd = volSD - lag(volSD)) %>%
  spread(tile, freqtile_power)

split the data

set.seed(101) 
sample = sample.split(joined$artifact, SplitRatio = .75)
training = subset(joined, sample == TRUE)
testing = subset(joined, sample == FALSE)

machine learning

use lasso logistic regression to fit beta weights for each predictor

# tidy data
train.ml = training %>%
  group_by(subjectID, run) %>%
  mutate(Diff.mean = ifelse(is.na(Diff.mean),0,Diff.mean),
         Diff.sd = ifelse(is.na(Diff.sd),0,Diff.sd)) %>%
  gather(tile,freqtile_power, starts_with("tile")) %>%
  mutate(tile = paste0(tile,"_c")) %>%
  group_by(subjectID, run, tile) %>%
  mutate(freqtile_power_c = freqtile_power - mean(freqtile_power, na.rm=TRUE)) %>%
  ungroup() %>%
  select(-freqtile_power) %>%
  spread(tile,freqtile_power_c) %>%
  select(-striping, - intensity, -trash.rp, -fsl.volume, -volMean, -volSD, -euclidian_rot, -euclidian_trans) %>%
  select(subjectID, run, volume, artifact, everything())

test.ml = testing %>%
  group_by(subjectID, run) %>%
  mutate(Diff.mean = ifelse(is.na(Diff.mean),0,Diff.mean),
         Diff.sd = ifelse(is.na(Diff.sd),0,Diff.sd)) %>%
  gather(tile,freqtile_power, starts_with("tile")) %>%
  mutate(tile = paste0(tile,"_c")) %>%
  group_by(subjectID, run, tile) %>%
  mutate(freqtile_power_c = freqtile_power - mean(freqtile_power, na.rm=TRUE)) %>%
  ungroup() %>%
  select(-freqtile_power) %>%
  spread(tile,freqtile_power_c) %>%
  select(-striping, - intensity, -trash.rp, -fsl.volume, -volMean, -volSD, -euclidian_rot, -euclidian_trans) %>%
  select(subjectID, run, volume, artifact, everything())

# subset predictors and criterion
x_train = as.matrix(train.ml[,-c(1,2,3,4)])
y_train = as.double(as.matrix(train.ml[, 4]))

# run xval to determine lambda
cv.train <- cv.glmnet(x_train, y_train, family='binomial', alpha=1, parallel=TRUE, standardize=TRUE, type.measure='auc')

plot(cv.train)

plot(cv.train$glmnet.fit, xvar="lambda", label=TRUE)

cv.train$lambda.min
## [1] 0.0001055024
cv.train$lambda.1se
## [1] 0.01331289
coef(cv.train, s=cv.train$lambda.min)
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                                    1
## (Intercept)              -5.34966479
## euclidian_trans_deriv    -1.96821531
## euclidian_rot_deriv       1.25812915
## Diff.mean                 0.01511855
## Diff.sd                   0.03814063
## tile_1_c                -68.96785531
## tile_10_c              7750.81268518
## tile_11_c             -5214.91044479
## tile_2_c               -349.90077443
## tile_3_c                  .         
## tile_4_c              -1015.84159332
## tile_5_c              -1210.50826519
## tile_6_c                176.50824285
## tile_7_c               1794.76700119
## tile_8_c               3217.08847243
## tile_9_c               -811.96646874
coef(cv.train, s=cv.train$lambda.1se)
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                                   1
## (Intercept)             -4.24641815
## euclidian_trans_deriv    .         
## euclidian_rot_deriv      .         
## Diff.mean                .         
## Diff.sd                  0.01718502
## tile_1_c               -17.31491278
## tile_10_c             3019.45382532
## tile_11_c                .         
## tile_2_c                 .         
## tile_3_c                 .         
## tile_4_c                 .         
## tile_5_c                 .         
## tile_6_c                 .         
## tile_7_c                 .         
## tile_8_c                 .         
## tile_9_c                 .
# test on sample
pred_train = predict(cv.train, newx = x_train, s=cv.train$lambda.1se, type="response")

# plot cutoff v. accuracy
predicted = prediction(pred_train, y_train, label.ordering = NULL)
perf = performance(predicted, measure = "acc")
perf.df = data.frame(cut=perf@x.values[[1]],acc=perf@y.values[[1]])

ggplot(perf.df, aes(cut, acc)) +
  geom_line()

# plot false v. true positive rate
perf = performance(predicted, measure = "tpr", x.measure = "fpr")
perf.df = data.frame(cut=perf@alpha.values[[1]],fpr=perf@x.values[[1]],tpr=perf@y.values[[1]])

ggplot(perf.df, aes(fpr, tpr)) +
  geom_line()

# plot specificity v. sensitivity
perf = performance(predicted, measure = "sens", x.measure = "spec")
perf.df = data.frame(cut=perf@alpha.values[[1]],sens=perf@x.values[[1]],spec=perf@y.values[[1]])
ggplot(perf.df, aes(spec, sens)) +
  geom_line()

ggplot(perf.df, aes(x = cut)) +
  geom_line(aes(y = sens, color = "sensitivity")) + 
  geom_line(aes(y = spec, color = "specificity"))

cut = perf@alpha.values[[1]][which.max(perf@x.values[[1]]+perf@y.values[[1]])]
ss = max(perf@x.values[[1]]+perf@y.values[[1]]) # sensitivity + specificity

# confusion matrix
pred_train = predict(cv.train, newx = x_train, s=cv.train$lambda.1se, type="response")
pred_train[pred_train > .03] = 1
pred_train[pred_train < .03] = 0
confusionMatrix(pred_train, y_train)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 5530   34
##          1  127   85
##                                             
##                Accuracy : 0.9721            
##                  95% CI : (0.9675, 0.9762)  
##     No Information Rate : 0.9794            
##     P-Value [Acc > NIR] : 0.9999            
##                                             
##                   Kappa : 0.5004            
##  Mcnemar's Test P-Value : 0.0000000000004149
##                                             
##             Sensitivity : 0.9775            
##             Specificity : 0.7143            
##          Pos Pred Value : 0.9939            
##          Neg Pred Value : 0.4009            
##              Prevalence : 0.9794            
##          Detection Rate : 0.9574            
##    Detection Prevalence : 0.9633            
##       Balanced Accuracy : 0.8459            
##                                             
##        'Positive' Class : 0                 
## 
######### test on holdout sample
# subset predictors and criterion
x_test = as.matrix(test.ml[,-c(1,2,3,4)])
y_test = as.double(as.matrix(test.ml[, 4]))

# test on holdout sample
pred_test = predict(cv.train, newx = x_test, s=cv.train$lambda.1se, type="response")
pred_test[pred_test > .03] = 1
pred_test[pred_test < .03] = 0

# confusion matrix
confusionMatrix(pred_test, y_test)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 1846    7
##          1   40   33
##                                          
##                Accuracy : 0.9756         
##                  95% CI : (0.9677, 0.982)
##     No Information Rate : 0.9792         
##     P-Value [Acc > NIR] : 0.8828         
##                                          
##                   Kappa : 0.5726         
##  Mcnemar's Test P-Value : 0.000003046    
##                                          
##             Sensitivity : 0.9788         
##             Specificity : 0.8250         
##          Pos Pred Value : 0.9962         
##          Neg Pred Value : 0.4521         
##              Prevalence : 0.9792         
##          Detection Rate : 0.9585         
##    Detection Prevalence : 0.9621         
##       Balanced Accuracy : 0.9019         
##                                          
##        'Positive' Class : 0              
## 
######### logistic regression
## note: couldn't get predict function to predict test values
# log = glm(artifact ~ volMean + volSD + euclidian_trans + euclidian_rot + tile_1 + tile_2 + tile_3 + tile_4 + tile_5 + tile_6 + tile_7 + tile_8 + tile_9 + tile_10 + tile_11, family='binomial', data=train)
# 
# pred = predict(log, newx = train, type="response")
# predicted = prediction(pred, y_train, label.ordering = NULL)
# perf = performance(predicted, measure = "sens", x.measure = "spec")
# perf.df = data.frame(cut=perf@alpha.values[[1]],sens=perf@x.values[[1]],spec=perf@y.values[[1]])
# ggplot(perf.df, aes(spec, sens)) +
#   geom_line()
# 
# ggplot(perf.df, aes(x = cut)) +
#   geom_line(aes(y = sens, color = "sensitivity")) + 
#   geom_line(aes(y = spec, color = "specificity")) +
#   geom_vline(xintercept = .03)
# 
# pred = predict(log, newx = x_test, type="response")
# pred[pred > .03] = 1
# pred[pred < .03] = 0
# confusionMatrix(pred, y_test)

svm

train.svm = train.ml[,-c(1,2,3)] %>%
  mutate(artifact = ifelse(artifact == 1, "yes","no"),
         artifact = as.factor(artifact))
test.svm = test.ml[,-c(1,2,3)] %>%
  mutate(artifact = ifelse(artifact == 1, "yes","no"),
         artifact = as.factor(artifact))

# specify control parameters
fitControl = trainControl(method = "repeatedcv", number = 10, repeats = 3, classProbs = TRUE)

# run initial model
set.seed(101)
svmFit = train(artifact ~ ., 
               data = train.svm, 
               method = "svmLinear", 
               trControl = fitControl,
               preProcess = c("center", "scale"),
               tuneLength = 10,
               metric = "ROC",
               verbose = FALSE)
svmFit$finalModel
## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 157 
## 
## Objective Function Value : -150.248 
## Training error : 0.010215 
## Probability model included.
# predict model
train_pred = predict(svmFit, newdata = train.svm, type="prob") %>%
  select(-no)

# plot cutoff v. accuracy
predicted = prediction(train_pred, train.svm$artifact, label.ordering = NULL)
perf = performance(predicted, measure = "acc")
perf.df = data.frame(cut=perf@x.values[[1]],acc=perf@y.values[[1]])

ggplot(perf.df, aes(cut, acc)) +
  geom_line()

# plot false v. true positive rate
perf = performance(predicted, measure = "tpr", x.measure = "fpr")
perf.df = data.frame(cut=perf@alpha.values[[1]],fpr=perf@x.values[[1]],tpr=perf@y.values[[1]])

ggplot(perf.df, aes(fpr, tpr)) +
  geom_line()

# plot specificity v. sensitivity
perf = performance(predicted, measure = "sens", x.measure = "spec")
perf.df = data.frame(cut=perf@alpha.values[[1]],sens=perf@x.values[[1]],spec=perf@y.values[[1]])
ggplot(perf.df, aes(spec, sens)) +
  geom_line()

ggplot(perf.df, aes(x = cut)) +
  geom_line(aes(y = sens, color = "sensitivity")) + 
  geom_line(aes(y = spec, color = "specificity"))

cut = perf@alpha.values[[1]][which.max(perf@x.values[[1]]+perf@y.values[[1]])]
ss = max(perf@x.values[[1]]+perf@y.values[[1]]) # sensitivity + specificity

# cut and assess accuracy in training sample
train_pred = predict(svmFit, newdata = train.svm, type="prob") %>%
  select(-no)
train_pred =  as.matrix(train_pred)
train_pred[train_pred > .07] = "yes"
train_pred[train_pred < .07] = "no"
confusionMatrix(train_pred, train.svm$artifact)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   no  yes
##        no  5612   36
##        yes   45   83
##                                           
##                Accuracy : 0.986           
##                  95% CI : (0.9826, 0.9888)
##     No Information Rate : 0.9794          
##     P-Value [Acc > NIR] : 0.0001236       
##                                           
##                   Kappa : 0.6649          
##  Mcnemar's Test P-Value : 0.3740628       
##                                           
##             Sensitivity : 0.9920          
##             Specificity : 0.6975          
##          Pos Pred Value : 0.9936          
##          Neg Pred Value : 0.6484          
##              Prevalence : 0.9794          
##          Detection Rate : 0.9716          
##    Detection Prevalence : 0.9778          
##       Balanced Accuracy : 0.8448          
##                                           
##        'Positive' Class : no              
## 
# cut and assess accuracy in test sample
test_pred = predict(svmFit, newdata = test.svm, type="prob") %>%
  select(-no)
test_pred =  as.matrix(test_pred)
test_pred[test_pred > .07] = "yes"
test_pred[test_pred < .07] = "no"
confusionMatrix(test_pred, test.svm$artifact)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   no  yes
##        no  1867   11
##        yes   19   29
##                                           
##                Accuracy : 0.9844          
##                  95% CI : (0.9778, 0.9895)
##     No Information Rate : 0.9792          
##     P-Value [Acc > NIR] : 0.05977         
##                                           
##                   Kappa : 0.6512          
##  Mcnemar's Test P-Value : 0.20124         
##                                           
##             Sensitivity : 0.9899          
##             Specificity : 0.7250          
##          Pos Pred Value : 0.9941          
##          Neg Pred Value : 0.6042          
##              Prevalence : 0.9792          
##          Detection Rate : 0.9694          
##    Detection Prevalence : 0.9751          
##       Balanced Accuracy : 0.8575          
##                                           
##        'Positive' Class : no              
## 
### weighted model
# create model weights (they sum to one)
model_weights = ifelse(train.svm$artifact == "yes",
                        (1/table(train.svm$artifact)[1]) * 0.5,
                        (1/table(train.svm$artifact)[2]) * 0.5)

# use the same seed to ensure same cross-validation splits
fitControl$seeds = svmFit$control$seeds

svmFit_weighted = train(artifact ~ .,
               data = train.svm,
               method = "svmLinear",
               trControl = fitControl,
               preProcess = c("center", "scale"),
               tuneLength = 10,
               metric = "ROC",
               verbose = FALSE,
               weights = model_weights)

svmFit_weighted$finalModel
## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 157 
## 
## Objective Function Value : -150.248 
## Training error : 0.010215 
## Probability model included.
test_pred_weighted = predict(svmFit_weighted, newdata = test.svm, type="prob") %>%
  select(-no)
test_pred_weighted =  as.matrix(test_pred_weighted)
test_pred_weighted[test_pred_weighted > .07] = "yes"
test_pred_weighted[test_pred_weighted < .07] = "no"
confusionMatrix(test_pred_weighted, test.svm$artifact)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   no  yes
##        no  1867   11
##        yes   19   29
##                                           
##                Accuracy : 0.9844          
##                  95% CI : (0.9778, 0.9895)
##     No Information Rate : 0.9792          
##     P-Value [Acc > NIR] : 0.05977         
##                                           
##                   Kappa : 0.6512          
##  Mcnemar's Test P-Value : 0.20124         
##                                           
##             Sensitivity : 0.9899          
##             Specificity : 0.7250          
##          Pos Pred Value : 0.9941          
##          Neg Pred Value : 0.6042          
##              Prevalence : 0.9792          
##          Detection Rate : 0.9694          
##    Detection Prevalence : 0.9751          
##       Balanced Accuracy : 0.8575          
##                                           
##        'Positive' Class : no              
## 
# # determine best cost parameter
# grid <- expand.grid(C = c(0, 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 5))
# set.seed(3233)
# svm_Linear_Grid <- train(artifact ~ ., 
#                          data = train.svm, method = "svmLinear",
#                          trControl=fitControl,
#                          preProcess = c("center", "scale"),
#                          tuneGrid = grid,
#                          tuneLength = 10)
# svm_Linear_Grid
# plot(svm_Linear_Grid)
# test_pred_grid = predict(svm_Linear_Grid, newdata = test.svm)
# confusionMatrix(test_pred_grid, test.svm$artifact)

auto-motion process

training

train.man = training %>%
  gather(tile, freqtile_power, starts_with("tile")) %>%
  filter(tile %in% c("tile_1", "tile_10")) %>%
  
  # code trash based on mean, sd, and rp
  ungroup %>%
  mutate(meanDiff.mean = mean(Diff.mean, na.rm=TRUE),
         sdDiff.mean = sd(Diff.mean, na.rm=TRUE),
         meanDiff.sd = mean(Diff.sd, na.rm=TRUE),
         sdDiff.sd = sd(Diff.sd, na.rm=TRUE),
         
         # code volumes above mean thresholds as trash
         upper.mean = meanDiff.mean + 2*sdDiff.mean,
         lower.mean = meanDiff.mean - 2*sdDiff.mean,
         trash.mean = ifelse(Diff.mean > upper.mean | Diff.mean < lower.mean, 1, 0),
         
         upper.sd = meanDiff.sd + 2*sdDiff.sd,
         lower.sd = meanDiff.sd - 2*sdDiff.sd,
         trash.sd = ifelse(Diff.sd > upper.sd | Diff.sd < lower.sd, 1, 0),
         
         # code volumes with more than +/- .25mm translation or rotation in Euclidian distance
         trash.rp.tr = ifelse(euclidian_trans_deriv > .25 | euclidian_trans_deriv < -.25, 1, trash.rp),
         trash.rp.rot = ifelse(euclidian_rot_deriv > .25 | euclidian_rot_deriv < -.25, 1, trash.rp)) %>%
  select(-meanDiff.mean, -meanDiff.sd, -sdDiff.mean, -sdDiff.sd) %>%
  
  # code trash based on striping
  group_by(subjectID, run, tile) %>%
  mutate(freqtile_power_c = freqtile_power - mean(freqtile_power, na.rm=TRUE)) %>%
  ungroup() %>%
  select(-freqtile_power) %>%
  spread(tile,freqtile_power_c) %>%
  mutate(trash.stripe = ifelse(tile_1 < -.035 & tile_10 > .00025, 1, 0)) %>%
  
  # combine trash
  mutate(trash.combined = ifelse(trash.stripe == 1, 1, 0),
         trash.combined = ifelse((trash.rp.tr + trash.rp.rot + trash.mean + trash.sd) > 1, 1, trash.combined)) %>%
         #trash.combined = ifelse(lead(trash.combined) == TRUE, TRUE, trash.combined)) %>%
  
  # recode as trash if volume behind and in front are both marked as trash
  mutate(trash.combined = ifelse(trash.combined == 0 & lag(trash.combined) == 1 & lead(trash.combined) == 1, 1, trash.combined)) %>%
         
  # code first volume as trash if second volume is trash
  mutate(trash.combined = ifelse(volume == 1 & lead(trash.combined) == 1, 1, trash.combined)) %>%

  # code hits
  mutate(hits = ifelse(trash.combined == 1 & (artifact == 1), "hit",
                #ifelse(trash.combined == 1 & (artifact == 1), "hit.light",
                ifelse(trash.combined == 0 & (artifact == 1), "neg",
                #ifelse(trash.combined == 0 & (artifact == 1), "neg.light",
                ifelse(trash.combined == 1 & (artifact == 0), "pos", NA))),
         label = ifelse(regexpr('.*', hits), as.character(volume), ''),
         hits = as.factor(hits)) %>%
  gather(tile, freqtile_power_c, c("tile_1", "tile_10"))

testing

test.man = testing %>%
  gather(tile, freqtile_power, starts_with("tile")) %>%
  filter(tile %in% c("tile_1", "tile_10")) %>%
  
  # code trash based on mean, sd, and rp 
  ungroup %>%
  mutate(meanDiff.mean = mean(Diff.mean, na.rm=TRUE),
         sdDiff.mean = sd(Diff.mean, na.rm=TRUE),
         meanDiff.sd = mean(Diff.sd, na.rm=TRUE),
         sdDiff.sd = sd(Diff.sd, na.rm=TRUE),
         
         # code volumes above mean thresholds as trash
         upper.mean = meanDiff.mean + 2*sdDiff.mean,
         lower.mean = meanDiff.mean - 2*sdDiff.mean,
         trash.mean = ifelse(Diff.mean > upper.mean | Diff.mean < lower.mean, 1, 0),
         
         upper.sd = meanDiff.sd + 2*sdDiff.sd,
         lower.sd = meanDiff.sd - 2*sdDiff.sd,
         trash.sd = ifelse(Diff.sd > upper.sd | Diff.sd < lower.sd, 1, 0),
         
         # code volumes with more than +/- .25mm translation or rotation in Euclidian distance
         trash.rp.tr = ifelse(euclidian_trans_deriv > .25 | euclidian_trans_deriv < -.25, 1, trash.rp),
         trash.rp.rot = ifelse(euclidian_rot_deriv > .25 | euclidian_rot_deriv < -.25, 1, trash.rp)) %>%
  select(-meanDiff.mean, -meanDiff.sd, -sdDiff.mean, -sdDiff.sd) %>%
  
  # code trash based on striping
  group_by(subjectID, run, tile) %>%
  mutate(freqtile_power_c = freqtile_power - mean(freqtile_power, na.rm=TRUE)) %>%
  ungroup() %>%
  select(-freqtile_power) %>%
  spread(tile,freqtile_power_c) %>%
  mutate(trash.stripe = ifelse(tile_1 < -.035 & tile_10 > .00025, 1, 0)) %>%
  
  # combine trash
  mutate(trash.combined = ifelse(trash.stripe == 1, 1, 0),
         trash.combined = ifelse((trash.rp.tr + trash.rp.rot + trash.mean + trash.sd) > 1, 1, trash.combined)) %>%
         #trash.combined = ifelse(lead(trash.combined) == TRUE, TRUE, trash.combined)) %>%
  
  # recode as trash if volume behind and in front are both marked as trash
  mutate(trash.combined = ifelse(trash.combined == 0 & lag(trash.combined) == 1 & lead(trash.combined) == 1, 1, trash.combined)) %>%
         
  # code first volume as trash if second volume is trash
  mutate(trash.combined = ifelse(volume == 1 & lead(trash.combined) == 1, 1, trash.combined)) %>%

  # code hits
  mutate(hits = ifelse(trash.combined == 1 & (artifact == 1), "hit",
                #ifelse(trash.combined == 1 & (artifact == 1), "hit.light",
                ifelse(trash.combined == 0 & (artifact == 1), "neg",
                #ifelse(trash.combined == 0 & (artifact == 1), "neg.light",
                ifelse(trash.combined == 1 & (artifact == 0), "pos", NA))),
         label = ifelse(regexpr('.*', hits), as.character(volume), ''),
         hits = as.factor(hits)) %>%
  gather(tile, freqtile_power_c, c("tile_1", "tile_10"))

compare hit rates

training data

# select only one set of observations and code any artifact as 1
train.tab = train.man %>% 
  filter(tile == "tile_1") %>%
  mutate(hits.tot = ifelse(hits %in% c("hit", "hit.light"), "hit",
                    ifelse(hits %in% c("neg", "neg.light"), "neg",
                    ifelse(hits %in% c("pos"), "pos",
                    ifelse(is.na(hits), "cor.rej", NA)))))

lasso logistic regression

confusionMatrix(pred_train, y_train)$table
##           Reference
## Prediction    0    1
##          0 5530   34
##          1  127   85
confusionMatrix(pred_train, y_train)$overall[1]
## Accuracy 
## 0.972126
confusionMatrix(pred_train, y_train)$byClass[11]
## Balanced Accuracy 
##         0.8459178

svm

confusionMatrix(train_pred, train.svm$artifact)$table
##           Reference
## Prediction   no  yes
##        no  5612   36
##        yes   45   83
confusionMatrix(train_pred, train.svm$artifact)$overall[1]
##  Accuracy 
## 0.9859765
confusionMatrix(train_pred, train.svm$artifact)$byClass[11]
## Balanced Accuracy 
##         0.8447621

manual

table(train.tab$hits.tot)
## 
## cor.rej     hit     neg     pos 
##    5609      90      28      49
#table(comp.train$hits)

cor.rej = table(train.tab$hits.tot)[[1]]
hit = table(train.tab$hits.tot)[[2]]
neg = table(train.tab$hits.tot)[[3]]
pos = table(train.tab$hits.tot)[[4]]
total = cor.rej + hit + neg + pos

sprintf('Accuracy: %s', round((cor.rej + hit) / total,2))
## [1] "Accuracy: 0.99"
# balanced accuracy
sprintf('Balanced accuracy: %s', round(((cor.rej / (cor.rej + pos)) + (hit / (hit + neg))) / 2,2))
## [1] "Balanced accuracy: 0.88"

test data

test.tab = test.man %>% 
  filter(tile == "tile_1") %>%
  mutate(hits.tot = ifelse(hits %in% c("hit", "hit.light"), "hit",
                    ifelse(hits %in% c("neg", "neg.light"), "neg",
                    ifelse(hits %in% c("pos"), "pos",
                    ifelse(is.na(hits), "cor.rej", NA)))))

lasso logistic regression

confusionMatrix(pred_test, y_test)$table
##           Reference
## Prediction    0    1
##          0 1846    7
##          1   40   33
confusionMatrix(pred_test, y_test)$overall[1]
##  Accuracy 
## 0.9755971
confusionMatrix(pred_test, y_test)$byClass[11]
## Balanced Accuracy 
##         0.9018955

svm

confusionMatrix(test_pred, test.svm$artifact)$table
##           Reference
## Prediction   no  yes
##        no  1867   11
##        yes   19   29
confusionMatrix(test_pred, test.svm$artifact)$overall[1]
##  Accuracy 
## 0.9844237
confusionMatrix(test_pred, test.svm$artifact)$byClass[11]
## Balanced Accuracy 
##         0.8574629

manual

table(test.tab$hits.tot)
## 
## cor.rej     hit     neg     pos 
##    1874      35       4      13
#table(comp.test$hits)

cor.rej = table(test.tab$hits.tot)[[1]]
hit = table(test.tab$hits.tot)[[2]]
neg = table(test.tab$hits.tot)[[3]]
pos = table(test.tab$hits.tot)[[4]]
total = cor.rej + hit + neg + pos

sprintf('Accuracy: %s', round((cor.rej + hit) / total,2))
## [1] "Accuracy: 0.99"
# balanced accuracy
sprintf('Balanced accuracy: %s', round(((cor.rej / (cor.rej + pos)) + (hit / (hit + neg))) / 2,2))
## [1] "Balanced accuracy: 0.95"

plot composite

y = translation derivative

comp = bind_rows(train.man, test.man)

thresholds = comp %>% 
  filter(tile == "tile_1") %>%
  select(subjectID, run) %>% 
  unique(.) %>% 
  mutate(upper = .25,
         lower = -.25)

# ggplot(filter(comp, tile == "tile_1"), aes(x = volume, y = euclidian_trans_deriv)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(comp, !is.na(hits)), aes(color = hits), size = 2.5) +
#   geom_text(aes(label = label), size = 1.5) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
#   #scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

y = mean intensity

thresholds = comp %>% 
  filter(tile == "tile_1") %>%
  select(subjectID, run, upper.mean, lower.mean) %>% 
  unique(.) %>%
  mutate(upper = upper.mean,
         lower = lower.mean)

# ggplot(filter(comp, tile == "tile_1"), aes(x = volume, y = Diff.mean)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(comp, !is.na(hits)), aes(color = hits), size = 2.5) +
#   geom_text(aes(label = label), size = 1.5) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
#   #scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

y = freqtile power

thresholds = comp %>% 
  select(subjectID, run, tile) %>% 
  unique(.) %>% 
  mutate(y = ifelse(tile == 1, -.035, .00025))

# ggplot(comp, aes(x = volume, y = freqtile_power_c)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(comp, !is.na(hits)), aes(color = hits), size = 2.5) +
#   geom_text(aes(label = label), size = 1.5) +
#   facet_wrap(~ subjectID + run + tile, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
#   #scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   geom_hline(data = thresholds, aes(yintercept = y), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

plot machine learning

logistic regression, y = translation derivative

data = bind_rows(train.ml,test.ml)

# re-run lasso logistic regression on full sample
x = as.matrix(data[,-c(1,2,3,4)])
y = as.double(as.matrix(data[, 4]))
pred = predict(cv.train, newx = x, s=cv.train$lambda.1se, type="response")
pred[pred > .03] = 1
pred[pred < .03] = 0

data.plot.log = bind_cols(data, as.data.frame(y), as.data.frame(pred)) %>%
  mutate(hits = ifelse(y == 1 & `1` == 1, "hit",
                ifelse(y == 0 & `1` == 1, "pos",
                ifelse(y == 1 & `1` == 0, "neg", NA))),
         label = ifelse(regexpr('.*', hits), as.character(volume), ''),
         hits = as.factor(hits))
  
# ggplot(data.plot.log, aes(x = volume, y = euclidian_trans_deriv)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(data.plot.log, !is.na(hits)), aes(color = hits), size = 2.5) +
#   geom_text(aes(label = label), size = 1.5) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
#   theme(axis.text.x = element_text(size = 6))

svm, y = translation derivative

full_svm = bind_rows(train.svm,test.svm)

# re-run svm on full sample
fitControl = trainControl(method = "repeatedcv", number = 10, repeats = 3, classProbs = TRUE)
set.seed(101)
full_svmFit = train(artifact ~ ., 
               data = full_svm, 
               method = "svmLinear", 
               trControl = fitControl,
               preProcess = c("center", "scale"),
               tuneLength = 10,
               metric = "ROC",
               verbose = FALSE)

#full_pred = predict(full_svmFit, newdata = full_svm)
full_pred = predict(full_svmFit, newdata = full_svm, type="prob") %>%
  select(-no)
full_pred =  as.matrix(full_pred)
full_pred[full_pred > .07] = "yes"
full_pred[full_pred < .07] = "no"
confusionMatrix(full_pred, full_svm$artifact)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   no  yes
##        no  7474   41
##        yes   69  118
##                                           
##                Accuracy : 0.9857          
##                  95% CI : (0.9828, 0.9882)
##     No Information Rate : 0.9794          
##     P-Value [Acc > NIR] : 0.00002099      
##                                           
##                   Kappa : 0.6748          
##  Mcnemar's Test P-Value : 0.01004         
##                                           
##             Sensitivity : 0.9909          
##             Specificity : 0.7421          
##          Pos Pred Value : 0.9945          
##          Neg Pred Value : 0.6310          
##              Prevalence : 0.9794          
##          Detection Rate : 0.9704          
##    Detection Prevalence : 0.9757          
##       Balanced Accuracy : 0.8665          
##                                           
##        'Positive' Class : no              
## 
data.plot.svm = bind_cols(data, as.data.frame(full_pred)) %>%
  mutate(hits = ifelse(artifact == 1 & full_pred == "yes", "hit",
                ifelse(artifact == 0 & full_pred == "yes", "pos",
                ifelse(artifact == 1 & full_pred == "no", "neg", NA))),
         label = ifelse(regexpr('.*', hits), as.character(volume), ''),
         hits = as.factor(hits))
  
# ggplot(data.plot.svm, aes(x = volume, y = euclidian_trans_deriv)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(data.plot.svm, !is.na(hits)), aes(color = hits), size = 2.5) +
#   geom_text(aes(label = label), size = 1.5) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
#   theme(axis.text.x = element_text(size = 6))

plot and compare models

plot.comp = comp %>%
  filter(tile == "tile_1") %>%
  select(subjectID, run, volume, euclidian_trans_deriv, hits) %>%
  rename("auto" = hits) %>%
  left_join(data.plot.log, by = c("subjectID", "run", "volume", "euclidian_trans_deriv")) %>%
  select(subjectID, run, volume, euclidian_trans_deriv, auto, hits) %>%
  rename("log" = hits) %>%
  left_join(data.plot.svm, by = c("subjectID", "run", "volume", "euclidian_trans_deriv")) %>%
  select(subjectID, run, volume, euclidian_trans_deriv, auto, log, hits, label) %>%
  rename("svm" = hits) %>%
  gather(model, hits, c("auto", "log", "svm"))

ggplot(plot.comp, aes(x = volume, y = euclidian_trans_deriv)) +
  geom_line(size = .25) +
  geom_point(data = subset(plot.comp, !is.na(hits)), aes(color = hits), size = 2.5) +
  geom_text(aes(label = label), size = 1.5) +
  facet_wrap(~ subjectID + run + model, ncol = 9, scales = "free") +
  scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
  theme(axis.text.x = element_text(size = 6))

old code

apply intensity and rp thresholds

# auto = joined %>%
#   group_by(subjectID, run) %>%
#   mutate(Diff.mean = volMean - lag(volMean),
#          Diff.sd = volSD - lag(volSD)) %>%
#   filter(tile == 1 | tile == 10) %>%
#   ungroup %>%
#   mutate(meanDiff.mean = mean(Diff.mean, na.rm=TRUE),
#          sdDiff.mean = sd(Diff.mean, na.rm=TRUE),
#          meanDiff.sd = mean(Diff.sd, na.rm=TRUE),
#          sdDiff.sd = sd(Diff.sd, na.rm=TRUE),
#          
#          # code volumes above mean thresholds as trash
#          trash.mean = ifelse(Diff.mean > (meanDiff.mean + 3*sdDiff.mean) | Diff.mean < (meanDiff.mean - 1.5*sdDiff.mean), 1, 0),
#          trash.sd = ifelse(Diff.sd > (meanDiff.sd + 3*sdDiff.sd) | Diff.sd < (meanDiff.sd - 3*sdDiff.sd), 1, 0),
#          
#          # code volumes with more than +/- .3mm translation in Euclidian distance
#          trash.rp = ifelse(euclidian_trans_deriv > .3 | euclidian_trans_deriv < -.3, 1, trash.rp),
#          # code volumes with more than +/- .3mm translation in Euclidian distance
#          trash.rp = ifelse(euclidian_rot_deriv > .3 | euclidian_rot_deriv < -.3, 1, trash.rp),
#          
#          # recode as trash if volume behind and in front are both marked as trash
#          trash.mean = ifelse(trash.mean == 0 & lag(trash.mean) == 1 & lead(trash.mean) == 1, 1, trash.mean),
#          trash.sd = ifelse(trash.sd == 0 & lag(trash.sd) == 1 & lead(trash.sd) == 1, 1, trash.sd),
#          trash.rp = ifelse(trash.rp == 0 & lag(trash.rp) == 1 & lead(trash.rp) == 1, 1, trash.rp),
#          
#          # code first volume as trash if second volume is trash
#          trash.mean = ifelse(volume == 1 & lead(trash.mean) == 1, 1, trash.mean),
#          trash.sd = ifelse(volume == 1 & lead(trash.sd) == 1, 1, trash.sd),
#          trash.rp = ifelse(volume == 1 & lead(trash.rp) == 1, 1, trash.rp)) %>%
#   
#   mutate(trash.mean = ifelse(is.na(trash.mean), 0, trash.mean),
#          trash.sd = ifelse(is.na(trash.sd), 0, trash.sd),
#          hits.rp = ifelse(trash.rp == 1 & (striping == 2 | intensity == 2), "hit",
#                 ifelse(trash.rp == 1 & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(trash.rp == 1 & (striping == 0 | intensity == 0), "pos",
#                 ifelse(trash.rp == 0 & (striping == 2 | intensity == 2), "neg",
#                 ifelse(trash.rp == 0 & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          hits.mean = ifelse(trash.mean == 1 & (striping == 2 | intensity == 2), "hit",
#                 ifelse(trash.mean == 1 & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(trash.mean == 1 & (striping == 0 | intensity == 0), "pos",
#                 ifelse(trash.mean == 0 & (striping == 2 | intensity == 2), "neg",
#                 ifelse(trash.mean == 0 & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          hits.sd = ifelse(trash.sd == 1 & (striping == 2 | intensity == 2), "hit",
#                 ifelse(trash.sd == 1 & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(trash.sd == 1 & (striping == 0 | intensity == 0), "pos",
#                 ifelse(trash.sd == 0 & (striping == 2 | intensity == 2), "neg",
#                 ifelse(trash.sd == 0 & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          label = ifelse(regexpr(".*", hits.rp) | regexpr(".*", hits.mean) | regexpr(".*", hits.sd), as.character(volume), '')) %>%
#   select(subjectID, run, volume, Diff.mean, Diff.sd, volMean, volSD, starts_with("euclidian"), hits.mean, hits.sd, hits.rp, tile, label)

apply absolute thresholds and plot

# test = joined %>%
#   group_by(subjectID, run, tile) %>%
#   mutate(freqtile_power_c = freqtile_power - mean(freqtile_power)) %>%
#   ungroup() %>%
#   filter(tile == 1 | tile == 10) %>%
#   select(-freqtile_power) %>%
#   spread(tile,freqtile_power_c) %>%
#   mutate(red_zone = ifelse(`1` < -.03 & `10` > .0002, TRUE, FALSE),
#          red_zone = ifelse(lead(red_zone) == TRUE, TRUE, red_zone),
#          hits = ifelse(red_zone == TRUE & (striping == 2 | intensity == 2), "hit",
#                 ifelse(red_zone == TRUE & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(red_zone == TRUE & (striping == 0 | intensity == 0), "pos",
#                 ifelse(red_zone == FALSE & (striping == 2 | intensity == 2), "neg",
#                 ifelse(red_zone == FALSE & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          label = ifelse(hits, as.character(volume), ''),
#          hits = as.factor(hits)) %>%
#   gather(tile, freqtile_power_c, c(`1`, `10`)) %>%
#   mutate(tile = as.numeric(tile))

apply threshold using SDs and plot

# test_diff = joined %>%
#   group_by(subjectID, run, tile) %>%
#   mutate(freqtile_power_c = freqtile_power - mean(freqtile_power),
#          diff = freqtile_power - lag(freqtile_power)) %>%
#   select(-freqtile_power, -freqtile_power_c) %>%
#   filter(tile == 1 | tile == 10) %>%
#   spread(tile,diff) %>%
#   ungroup() %>%
#   mutate(upper_1 = (mean(`1`,na.rm=TRUE) + 1.25*sd(`1`, na.rm=TRUE)),
#          lower_1 = (mean(`1`,na.rm=TRUE) - 1.25*sd(`1`, na.rm=TRUE)),
#          upper_10 = (mean(`10`,na.rm=TRUE) + 1.25*sd(`10`, na.rm=TRUE)),
#          lower_10 = (mean(`10`,na.rm=TRUE) - 1.25*sd(`10`, na.rm=TRUE))) %>%
#   mutate(red_zone_1 = ifelse(`1` > upper_1 | `1` < lower_1, TRUE, FALSE),
#          red_zone_10 = ifelse(`10` > upper_10 | `10` < lower_10, TRUE, FALSE),
#          red_zone = ifelse(red_zone_1 == TRUE & red_zone_10 == TRUE, TRUE, FALSE),
#          red_zone = ifelse(lead(red_zone) == TRUE, TRUE, red_zone),
#          red_zone = ifelse(is.na(red_zone), 0, red_zone),
#          hits = ifelse(red_zone == TRUE & (striping == 2 | intensity == 2), "hit",
#                 ifelse(red_zone == TRUE & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(red_zone == TRUE & (striping == 0 | intensity == 0), "pos",
#                 ifelse(red_zone == FALSE & (striping == 2 | intensity == 2), "neg",
#                 ifelse(red_zone == FALSE & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          label = ifelse(hits, as.character(volume), ''),
#          hits = as.factor(hits)) %>%
#   gather(tile, diff, c(`1`, `10`)) %>%
#   mutate(tile = as.numeric(tile))

mean intensities

# ggplot(filter(auto, tile == 1), aes(x = volume, y = volMean)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(auto, !is.na(hits.mean)), aes(color = hits.mean), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   #geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   #geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

sd intensities

# ggplot(filter(auto, tile == 1), aes(x = volume, y = volSD)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(auto, !is.na(hits.sd)), aes(color = hits.sd), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   #geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   #geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

translation

# ggplot(filter(auto, tile == 1), aes(x = volume, y = euclidian_trans_deriv)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(auto, !is.na(hits.rp)), aes(color = hits.rp), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   #geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   #geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

freqtile power

# thresholds = test %>% select(subjectID, run, tile) %>% unique(.) %>% mutate(y = ifelse(tile == 1, -.03, .0002))
# 
# ggplot(test, aes(x = volume, y = freqtile_power_c)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(test, !is.na(hits)), aes(color = hits), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~subjectID + run + tile, scales = "free", ncol = 4) +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   geom_hline(data = thresholds, aes(yintercept = y), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

freqtile SD

# thresholds = test_diff %>% 
#   select(subjectID, run, tile, diff) %>% 
#   group_by(tile) %>%
#   mutate(upper = mean(diff, na.rm=TRUE) + 1*sd(diff, na.rm=TRUE),
#          lower = mean(diff, na.rm=TRUE) - 1*sd(diff, na.rm=TRUE)) %>%
#   select(-diff) %>%
#   unique(.)
# 
# ggplot(test_diff, aes(x = volume, y = diff)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(test_diff, !is.na(hits)), aes(color = hits), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~ subjectID + run + tile, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))